# http://www.89ip.cn/
#https://github.com/TM0831/Spiders
"""
         89IP爬虫
         :return:
         """
import time

from bs4 import BeautifulSoup
from lxml import etree
import requests
headers=     {
        'Connection': 'keep-alive',
        # 'Cache-Control': 'max-age=0',
        # 'Accept': 'text/html, */*; q=0.01',
        # 'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36',
        # 'DNT': '1',
        # 'Accept-Encoding': 'gzip, deflate, sdch',
        # 'Accept-Language': 'zh-CN,zh;q=0.8,ja;q=0.6',
    }
Proxies = []
for i in range(1,6):
    url = "http://www.89ip.cn/index_{}.html".format(i)
    print(url)
    res = requests.get(url, headers=headers)
    html = BeautifulSoup(res.text, 'html.parser')
    table = html.find('table' ,attrs={'class':'layui-table'}).find('tbody')
    trs = table.find_all('tr')
    for tr in trs:
        tds = tr.find_all('td')
        ip = tds[0].text.replace('\t','').replace('\n','').replace(' ','')
        port = tds[1].text.replace('\t','').replace('\n','').replace(' ','')
        isHttps = 0
        anonymous = 0
        verifyTime = tds[4].text.replace('\t','').replace('\n','')
        Proxies.append({'ip':ip,'port':port,'anonymous':anonymous,'isHttps':isHttps,'verifyTime':verifyTime})
    time.sleep(2)
from SqlFun import saveKuaidaili
saveKuaidaili(Proxies)
    # try:
    #     et = etree.HTML(res.text)
    #     ip_list = et.xpath('//*[@class="layui-table"]/tbody/tr/td[1]/text()')
    #     port_list = et.xpath('//*[@class="layui-table"]/tbody/tr/td[2]/text()')
    #     ip_list = [i.strip() for i in ip_list]
    #     port_list = [i.strip() for i in port_list]
    #     for ip, port in zip(ip_list, port_list):
    #         print( ip + ":" + port)
    # except Exception as e:
    #     print(e)
#http://www.89ip.cn/tqdl.html?api=1&num=3000&port=&address=&isp=